lstat vs medv

Plot: lstat vs medv

Boston %>% 
  ggplot(aes(lstat, medv)) +
  geom_point(alpha = 0.4) +
  geom_smooth(method = "lm")

Fit LM: lstat vs medv

boston_lm_medv.lstat

Call:
lm(formula = medv ~ lstat, data = Boston)

Coefficients:
(Intercept)        lstat  
      34.55        -0.95  
summary(boston_lm_medv.lstat)

Call:
lm(formula = medv ~ lstat, data = Boston)

Residuals:
    Min      1Q  Median      3Q     Max 
-15.168  -3.990  -1.318   2.034  24.500 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 34.55384    0.56263   61.41   <2e-16 ***
lstat       -0.95005    0.03873  -24.53   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6.216 on 504 degrees of freedom
Multiple R-squared:  0.5441,    Adjusted R-squared:  0.5432 
F-statistic: 601.6 on 1 and 504 DF,  p-value: < 2.2e-16
names(boston_lm_medv.lstat)
 [1] "coefficients"  "residuals"     "effects"       "rank"         
 [5] "fitted.values" "assign"        "qr"            "df.residual"  
 [9] "xlevels"       "call"          "terms"         "model"        

Confidence Interval

confint(boston_lm_medv.lstat)
                2.5 %     97.5 %
(Intercept) 33.448457 35.6592247
lstat       -1.026148 -0.8739505

Broom’s Way

Predict

predict(boston_lm_medv.lstat, data.frame(lstat = (c(5, 10, 15))),
    interval = "confidence")
       fit      lwr      upr
1 29.80359 29.00741 30.59978
2 25.05335 24.47413 25.63256
3 20.30310 19.73159 20.87461
predict(boston_lm_medv.lstat, data.frame(lstat = (c(5, 10, 15))),
    interval = "prediction")
       fit       lwr      upr
1 29.80359 17.565675 42.04151
2 25.05335 12.827626 37.27907
3 20.30310  8.077742 32.52846

Multiple LM

medv ~ lstat + age

boston_lm_medv.lstat_age <- lm(medv ~ lstat + age, data = Boston)
summary(boston_lm_medv.lstat_age)

Call:
lm(formula = medv ~ lstat + age, data = Boston)

Residuals:
    Min      1Q  Median      3Q     Max 
-15.981  -3.978  -1.283   1.968  23.158 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 33.22276    0.73085  45.458  < 2e-16 ***
lstat       -1.03207    0.04819 -21.416  < 2e-16 ***
age          0.03454    0.01223   2.826  0.00491 ** 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6.173 on 503 degrees of freedom
Multiple R-squared:  0.5513,    Adjusted R-squared:  0.5495 
F-statistic:   309 on 2 and 503 DF,  p-value: < 2.2e-16
par(mfrow = c(2,2))
plot(boston_lm_medv.lstat_age)

All Predictors

summary(boston_lm_medv_all)

Call:
lm(formula = medv ~ ., data = Boston)

Residuals:
    Min      1Q  Median      3Q     Max 
-15.595  -2.730  -0.518   1.777  26.199 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  3.646e+01  5.103e+00   7.144 3.28e-12 ***
crim        -1.080e-01  3.286e-02  -3.287 0.001087 ** 
zn           4.642e-02  1.373e-02   3.382 0.000778 ***
indus        2.056e-02  6.150e-02   0.334 0.738288    
chas         2.687e+00  8.616e-01   3.118 0.001925 ** 
nox         -1.777e+01  3.820e+00  -4.651 4.25e-06 ***
rm           3.810e+00  4.179e-01   9.116  < 2e-16 ***
age          6.922e-04  1.321e-02   0.052 0.958229    
dis         -1.476e+00  1.995e-01  -7.398 6.01e-13 ***
rad          3.060e-01  6.635e-02   4.613 5.07e-06 ***
tax         -1.233e-02  3.760e-03  -3.280 0.001112 ** 
ptratio     -9.527e-01  1.308e-01  -7.283 1.31e-12 ***
black        9.312e-03  2.686e-03   3.467 0.000573 ***
lstat       -5.248e-01  5.072e-02 -10.347  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 4.745 on 492 degrees of freedom
Multiple R-squared:  0.7406,    Adjusted R-squared:  0.7338 
F-statistic: 108.1 on 13 and 492 DF,  p-value: < 2.2e-16

Since age and indus is not significant, we will remove them.

summary(boston_lm_medv_allupd)

Call:
lm(formula = medv ~ crim + zn + chas + nox + rm + dis + rad + 
    tax + ptratio + black + lstat, data = Boston)

Residuals:
     Min       1Q   Median       3Q      Max 
-15.5984  -2.7386  -0.5046   1.7273  26.2373 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  36.341145   5.067492   7.171 2.73e-12 ***
crim         -0.108413   0.032779  -3.307 0.001010 ** 
zn            0.045845   0.013523   3.390 0.000754 ***
chas          2.718716   0.854240   3.183 0.001551 ** 
nox         -17.376023   3.535243  -4.915 1.21e-06 ***
rm            3.801579   0.406316   9.356  < 2e-16 ***
dis          -1.492711   0.185731  -8.037 6.84e-15 ***
rad           0.299608   0.063402   4.726 3.00e-06 ***
tax          -0.011778   0.003372  -3.493 0.000521 ***
ptratio      -0.946525   0.129066  -7.334 9.24e-13 ***
black         0.009291   0.002674   3.475 0.000557 ***
lstat        -0.522553   0.047424 -11.019  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 4.736 on 494 degrees of freedom
Multiple R-squared:  0.7406,    Adjusted R-squared:  0.7348 
F-statistic: 128.2 on 11 and 494 DF,  p-value: < 2.2e-16

Interaction

Boston %>% 
  plotly::plot_ly(x = ~lstat, y = ~age, z = ~medv, size = 1)
No trace type specified:
  Based on info supplied, a 'scatter3d' trace seems appropriate.
  Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
No scatter3d mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
No trace type specified:
  Based on info supplied, a 'scatter3d' trace seems appropriate.
  Read more about this trace type -> https://plotly.com/r/reference/#scatter3d
No scatter3d mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
boston_lm_medv_lstat_i_age <- lm(medv ~ lstat*age, Boston)

summary(boston_lm_medv_lstat_i_age)

Quadratic

Boston %>% 
  ggplot(aes(lstat, medv)) +
  geom_point(alpha = 0.4) + geom_smooth(formula = "y ~ x + I(x^2)", 
                                        method = "lm")

lm(medv ~ lstat + I(lstat^2), Boston) %>% summary()

Call:
lm(formula = medv ~ lstat + I(lstat^2), data = Boston)

Residuals:
     Min       1Q   Median       3Q      Max 
-15.2834  -3.8313  -0.5295   2.3095  25.4148 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 42.862007   0.872084   49.15   <2e-16 ***
lstat       -2.332821   0.123803  -18.84   <2e-16 ***
I(lstat^2)   0.043547   0.003745   11.63   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 5.524 on 503 degrees of freedom
Multiple R-squared:  0.6407,    Adjusted R-squared:  0.6393 
F-statistic: 448.5 on 2 and 503 DF,  p-value: < 2.2e-16
LS0tCnRpdGxlOiAiTGluZWFyIE1vZGVsIC0gTGFiIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0Ka25pdHI6Om9wdHNfa25pdCRzZXQocm9vdC5kaXIgPSBycHJvanJvb3Q6OmZpbmRfcnN0dWRpb19yb290X2ZpbGUoKSkgIyBTZXQgV0QgdG8gUm9vdApoZXJlOjppX2FtKCJsYWJfbW9kL2NoMy1sbS1sYWIuUm1kIikKbGlicmFyeShoZXJlKQpsaWJyYXJ5KElTTFIyKQpsaWJyYXJ5KE1BU1MpCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGJyb29tKQpsaWJyYXJ5KHBsb3RseSkKYGBgCgpgYGB7cn0KaGVhZChCb3N0b24pCmBgYAoKIyBgbHN0YXRgIHZzIGBtZWR2YAoKIyMgUGxvdDogYGxzdGF0YCB2cyBgbWVkdmAKCgpgYGB7cn0KQm9zdG9uICU+JSAKICBnZ3Bsb3QoYWVzKGxzdGF0LCBtZWR2KSkgKwogIGdlb21fcG9pbnQoYWxwaGEgPSAwLjQpICsKICBnZW9tX3Ntb290aChtZXRob2QgPSAibG0iKQpgYGAKCiMjIEZpdCBMTTogYGxzdGF0YCB2cyBgbWVkdmAKCmBgYHtyfQpib3N0b25fbG1fbWVkdi5sc3RhdCA8LSBsbShtZWR2IH4gbHN0YXQsIEJvc3RvbikKYm9zdG9uX2xtX21lZHYubHN0YXQKYGBgCgpgYGB7cn0Kc3VtbWFyeShib3N0b25fbG1fbWVkdi5sc3RhdCkKYGBgCmBgYHtyfQpuYW1lcyhib3N0b25fbG1fbWVkdi5sc3RhdCkKYGBgCgpDb25maWRlbmNlIEludGVydmFsCgpgYGB7cn0KY29uZmludChib3N0b25fbG1fbWVkdi5sc3RhdCkKYGBgCgojIyMgQnJvb20ncyBXYXkKCmBgYHtyfQpicm9vbTo6dGlkeShib3N0b25fbG1fbWVkdi5sc3RhdCkKYGBgCgpgYGB7cn0KYnJvb206OmF1Z21lbnQoYm9zdG9uX2xtX21lZHYubHN0YXQpCmBgYAoKIyMjIFByZWRpY3QKCmBgYHtyfQpwcmVkaWN0KGJvc3Rvbl9sbV9tZWR2LmxzdGF0LCBkYXRhLmZyYW1lKGxzdGF0ID0gKGMoNSwgMTAsIDE1KSkpLAogICAgaW50ZXJ2YWwgPSAiY29uZmlkZW5jZSIpCmBgYAoKYGBge3J9CnByZWRpY3QoYm9zdG9uX2xtX21lZHYubHN0YXQsIGRhdGEuZnJhbWUobHN0YXQgPSAoYyg1LCAxMCwgMTUpKSksCiAgICBpbnRlcnZhbCA9ICJwcmVkaWN0aW9uIikKYGBgCgoKIyBNdWx0aXBsZSBMTQoKIyMgYG1lZHYgfiBsc3RhdCArIGFnZWAKCmBgYHtyfQpib3N0b25fbG1fbWVkdi5sc3RhdF9hZ2UgPC0gbG0obWVkdiB+IGxzdGF0ICsgYWdlLCBkYXRhID0gQm9zdG9uKQpzdW1tYXJ5KGJvc3Rvbl9sbV9tZWR2LmxzdGF0X2FnZSkKYGBgCgpgYGB7cn0KcGFyKG1mcm93ID0gYygyLDIpKQpwbG90KGJvc3Rvbl9sbV9tZWR2LmxzdGF0X2FnZSkKYGBgCiMjIEFsbCBQcmVkaWN0b3JzCgpgYGB7cn0KYm9zdG9uX2xtX21lZHZfYWxsIDwtIGxtKG1lZHYgfiAuLCBkYXRhID0gQm9zdG9uICkKc3VtbWFyeShib3N0b25fbG1fbWVkdl9hbGwpCmBgYApTaW5jZSBgYWdlYCBhbmQgYGluZHVzYCBpcyBub3Qgc2lnbmlmaWNhbnQsIHdlIHdpbGwgcmVtb3ZlIHRoZW0uCgpgYGB7cn0KYm9zdG9uX2xtX21lZHZfYWxsdXBkIDwtIHVwZGF0ZShib3N0b25fbG1fbWVkdl9hbGwsIH4uIC1hZ2UtaW5kdXMpCgpzdW1tYXJ5KGJvc3Rvbl9sbV9tZWR2X2FsbHVwZCkKYGBgCgojIEludGVyYWN0aW9uCgoKYGBge3J9CkJvc3RvbiAlPiUgCiAgcGxvdGx5OjpwbG90X2x5KHggPSB+bHN0YXQsIHkgPSB+YWdlLCB6ID0gfm1lZHYsIHNpemUgPSAxKQpgYGAKCgpgYGB7cn0KYm9zdG9uX2xtX21lZHZfbHN0YXRfaV9hZ2UgPC0gbG0obWVkdiB+IGxzdGF0KmFnZSwgQm9zdG9uKQoKc3VtbWFyeShib3N0b25fbG1fbWVkdl9sc3RhdF9pX2FnZSkKYGBgCgojIyBRdWFkcmF0aWMKCmBgYHtyfQpCb3N0b24gJT4lIAogIGdncGxvdChhZXMobHN0YXQsIG1lZHYpKSArCiAgZ2VvbV9wb2ludChhbHBoYSA9IDAuNCkgKyAKICBnZW9tX3Ntb290aChmb3JtdWxhID0gInkgfiB4ICsgSSh4XjIpIiwgbWV0aG9kID0gImxtIikKYGBgCgoKYGBge3J9CmxtKG1lZHYgfiBsc3RhdCArIEkobHN0YXReMiksIEJvc3RvbikgJT4lIHN1bW1hcnkoKQpgYGAKCg==